from urllib2 import Request, urlopen, URLError, HTTPError
from time import sleep
from itertools import izip
import requests
import sys

sys.path.append('../deob/')
import utils


# This class provides the functionality we want. You only need to look at
# this if you want to know how this works. It only needs to be defined
# once, no need to muck around with its internals.
class switch(object):
    def __init__(self, value):
        self.value = value
        self.fall = False

    def __iter__(self):
        """Return the match method once, then stop"""
        yield self.match
        raise StopIteration
    
    def match(self, *args):
        """Indicate whether or not to enter a case suite"""
        if self.fall or not args:
            return True
        elif self.value in args: # changed for v1.5, see below
            self.fall = True
            return True
        else:
            return False


class SafeBrowsing:
	def __init__(self, urlPerPost=500):
		self.urlPerPost = 500

	# The safebrowsing v2 is not implemented here.

	# This is an example GET request for the lookup API:
	def getReq(self, lookupurl):
		url = "https://sb-ssl.google.com/safebrowsing/api/lookup?client=api&apikey=ABQIAAAAByTIeu_ibQahWWvoHB3rChT1i2MeTIrZeMvk-RNEUyMjIzvYvw&appver=1.0&pver=3.0&url=http%3A%2F%2Fianfette.org%2F"
		req = Request(url)
		try:
			f = urlopen(req)
		except URLError, e:
			print e.code
			print e.read()
		print f.read()

	# This is an example POST request for the lookup API:
	def postReq(self, lookupurls):
		url = "https://sb-ssl.google.com/safebrowsing/api/lookup?client=api&apikey=ABQIAAAAByTIeu_ibQahWWvoHB3rChT1i2MeTIrZeMvk-RNEUyMjIzvYvw&appver=1.0&pver=3.0"
		data = "{0}\n{1}".format(len(lookupurls), "\n".join(lookupurls))
		response = requests.post(url, data)
		return response 

	# This method call postReq and handles exceptions.
	def postHandle(self, lookupurls, outf, fail_outf):
		loop = True
		while loop:
			response = self.postReq(lookupurls)
			for case in switch(response.status_code):
				# AT LEAST ONE of the queried URLs are matched in either the phishing or malware lists
				# the actual results are returned through the response body
				if case(200):
					loop = False
					# print "AT LEAST ONE is malware."
					responsetext = response.text.split("\n")
					results = [ url.decode('utf-8')+','+label for url, label in izip(lookupurls, responsetext) ]
					outf.write("\n".join(results).encode('utf-8') + "\n")
					break
				# NONE of the queried URLs matched the phishing or malware lists, no response body returned
				if case(204): 
					loop = False
					# print "NONE is malware."
					results = [ url+',ok' for url in lookupurls]
					outf.write("\n".join(results) + "\n")
					break
				# Bad Request - The HTTP request was not correctly formed
				if case(400):
					loop = False
					print "Bad Request."
					# log the failed urls.
					fail_outf.write("\n".join(lookupurls) + "\n")
					break
				# Not Authorized - The apikey is not authorized
				if case(401):
					loop = False
					print "Error: apikey not authorized."
					break
				# Service Unavailable - The server cannot handle the request.
				# Besides the normal server failures, it could also indicate that 
				# the client has been "throttled" by sending too many requests
				if case(503):
					loop = True
					print "Service Unavailable."
					sleep(60 * 60) # 60 * 60 * 24?
					break
				# Request too large.
				if case(413):
					loop = False
					print "Request too large. Recursively handling."
					num_urls = len(lookupurls)
					self.postHandle(lookupurls[0:num_urls/2], outf, fail_outf)
					self.postHandle(lookupurls[num_urls/2:], outf, fail_outf)
					break
				if case():
					loop = False
					print "status_code {0} not valid.".format(response.status_code)

	def postReqFile(self, infile, outfile):
		inf = open(infile, 'r')
		outf = open(outfile, 'w')
		fail_outf = open(outfile+'.failed', 'w')
		lookupurls = list()
		counter = 0  # Every 500 lines are posted

		while True:
			line = inf.readline()  # Read one line
			if (not line=='\n') and (not line==''):  # The content is valid
				line = line.split("\',\'")[1]
				counter = counter + 1
				lookupurls.append(line)
			elif line == '' and counter == 0: # The file is empty
				break
			if counter % 500 == 0 or line == '\n' or line == '':
				status = self.postHandle(lookupurls, outf, fail_outf)
				# Reinitialization after 500 lookup urls
				lookupurls = list()
				if line == '\n' or line == '':
					break

	def postReqFiles(self, inregex, indir, outdir):
		infiles = utils.lsresult(inregex)
		infiles = [infile.split('/')[-1] for infile in infiles]
		for infile in infiles:
			print "Processing {0}".format(infile)
			self.postReqFile(indir+infile, outdir+infile)

if __name__=="__main__":
	sb = SafeBrowsing()
	"""
	# Test URLs
	urls = ["http://www.google.com/", "http://ianfette.org/"]
	response = sb.postReq(urls)
	print response.text
	print response.status_code
	"""
	"""
	# Test file
	infile = '../../data/TempData/URLs/malurl-2013-05-20'
	outfile = '../../data/TempData/SafeBrowsing/malurl-2013-05-20'
	sb.postReqFile(infile, outfile)
	"""
	inregex = '../../data/TempData/URLs/malurl-20*'
	indir = '../../data/TempData/URLs/'
	outdir = '../../data/TempData/SafeBrowsing/'
	sb.postReqFiles(inregex, indir, outdir)


